{
 "cells": [
  {
   "cell_type": "markdown",
   "id": "cad5bd42-04d7-4dc0-8c6a-c6dae03e35a2",
   "metadata": {},
   "source": [
    "# Humanoids in Habitat\n",
    "This tutorial covers how to initialize and interact with humanoids in Habitat. Humanoids are represented as articulated agents with a deformable mesh. As such, they can interact in the environment the same way as describe in the ao_tutorial. While you can update the humanoid articulations freely, you may want to generate realistic motions. For this, we provide a set of controllers that update the humanoid joints to represent realistic behaviors. In this tutorial we will cover:\n",
    "- How to initialize a humanoid in the scene\n",
    "- Using humanoid controllers\n",
    "- Executing humanoid actions"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "e6862ee3-8667-40b7-b770-50fc84dfeb0f",
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import habitat_sim\n",
    "import magnum as mn\n",
    "import warnings\n",
    "from habitat.tasks.rearrange.rearrange_sim import RearrangeSim\n",
    "warnings.filterwarnings('ignore')\n",
    "from habitat_sim.utils.settings import make_cfg\n",
    "from matplotlib import pyplot as plt\n",
    "from habitat_sim.utils import viz_utils as vut\n",
    "from omegaconf import DictConfig\n",
    "import numpy as np\n",
    "from habitat.articulated_agents.robots import FetchRobot\n",
    "from habitat.config.default import get_agent_config\n",
    "from habitat.config.default_structured_configs import ThirdRGBSensorConfig, HeadRGBSensorConfig, HeadPanopticSensorConfig\n",
    "from habitat.config.default_structured_configs import SimulatorConfig, HabitatSimV0Config, AgentConfig\n",
    "from habitat.config.default import get_agent_config\n",
    "import habitat\n",
    "from habitat_sim.physics import JointMotorSettings, MotionType\n",
    "from omegaconf import OmegaConf\n",
    "from habitat.articulated_agent_controllers import (\n",
    "    HumanoidRearrangeController,\n",
    "    HumanoidSeqPoseController,\n",
    ")\n",
    "from habitat.config.default_structured_configs import HumanoidJointActionConfig, HumanoidPickActionConfig\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "de15dcad-6cf0-403e-9a27-8c80379158f6",
   "metadata": {},
   "outputs": [],
   "source": [
    "from habitat.config.default_structured_configs import TaskConfig, EnvironmentConfig, DatasetConfig, HabitatConfig\n",
    "from habitat.config.default_structured_configs import ArmActionConfig, BaseVelocityActionConfig, OracleNavActionConfig\n",
    "from habitat.core.env import Env\n",
    "def make_sim_cfg(agent_dict):\n",
    "    # Start the scene config\n",
    "    sim_cfg = SimulatorConfig(type=\"RearrangeSim-v0\")\n",
    "    \n",
    "    # This is for better graphics\n",
    "    sim_cfg.habitat_sim_v0.enable_hbao = True\n",
    "    sim_cfg.habitat_sim_v0.enable_physics = True\n",
    "\n",
    "    \n",
    "    # Set up an example scene\n",
    "    sim_cfg.scene = \"data/hab3_bench_assets/hab3-hssd/scenes/103997919_171031233.scene_instance.json\"\n",
    "    sim_cfg.scene_dataset = \"data/hab3_bench_assets/hab3-hssd/hab3-hssd.scene_dataset_config.json\"\n",
    "    sim_cfg.additional_object_paths = ['data/objects/ycb/configs/']\n",
    "\n",
    "    \n",
    "    cfg = OmegaConf.create(sim_cfg)\n",
    "\n",
    "    # Set the scene agents\n",
    "    cfg.agents = agent_dict\n",
    "    cfg.agents_order = list(cfg.agents.keys())\n",
    "    return cfg\n",
    "\n",
    "def make_hab_cfg(agent_dict, action_dict):\n",
    "    sim_cfg = make_sim_cfg(agent_dict)\n",
    "    task_cfg = TaskConfig(type=\"RearrangeEmptyTask-v0\")\n",
    "    task_cfg.actions = action_dict\n",
    "    env_cfg = EnvironmentConfig()\n",
    "    dataset_cfg = DatasetConfig(type=\"RearrangeDataset-v0\", data_path=\"data/hab3_bench_assets/episode_datasets/small_large.json.gz\")\n",
    "    \n",
    "    \n",
    "    hab_cfg = HabitatConfig()\n",
    "    hab_cfg.environment = env_cfg\n",
    "    hab_cfg.task = task_cfg\n",
    "    hab_cfg.dataset = dataset_cfg\n",
    "    hab_cfg.simulator = sim_cfg\n",
    "    hab_cfg.simulator.seed = hab_cfg.seed\n",
    "\n",
    "    return hab_cfg\n",
    "\n",
    "def init_rearrange_env(agent_dict, action_dict):\n",
    "    hab_cfg = make_hab_cfg(agent_dict, action_dict)\n",
    "    res_cfg = OmegaConf.create(hab_cfg)\n",
    "    return Env(res_cfg)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b9bfaa1b-3c6c-4071-9a82-b6c0919b069f",
   "metadata": {},
   "source": [
    "# Initializing humanoids in the scene\n",
    "As any articulated agent, humanoids are defined in Habitat via a URDF file. Our current URDF is based on the SMPL-X body model, with 54 rotation joints indicating relative rotation of different body parts. You can find the joint information [here](https://github.com/vchoutas/smplx/blob/main/smplx/joint_names.py#L19). Every human body will have its own URDF, and we provide scripts to generate new URDFs for arbitrary body shapes.\n",
    "We use `KinematicHumanoid` to represent humanoids, which allows to control the humanoid kinematically. \n",
    "\n",
    "Additionally, every humanoid has a motion_data file, which contains information about the joint rotations to enable the humanoid to walk or reach to objects. We will be using this file later.\n",
    "\n",
    "We will start defining an environment and adding a humanoid agent. As shown in the ao_tutorial, we need to define the action space. For humanoids we provide a basic action space, called HumanoidJointAction which kinematically updates the humanoid joints."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "be285467-a4fd-4030-b4f3-1c65e4bfd1ad",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the agent configuration\n",
    "main_agent_config = AgentConfig()\n",
    "urdf_path = \"data/hab3_bench_assets/humanoids/female_0/female_0.urdf\"\n",
    "main_agent_config.articulated_agent_urdf = urdf_path\n",
    "main_agent_config.articulated_agent_type = \"KinematicHumanoid\"\n",
    "main_agent_config.motion_data_path = \"data/hab3_bench_assets/humanoids/female_0/female_0_motion_data_smplx.pkl\"\n",
    "\n",
    "\n",
    "# Define sensors that will be attached to this agent, here a third_rgb sensor and a head_rgb.\n",
    "# We will later talk about why giving the sensors these names\n",
    "main_agent_config.sim_sensors = {\n",
    "    \"third_rgb\": ThirdRGBSensorConfig(),\n",
    "    \"head_rgb\": HeadRGBSensorConfig(),\n",
    "}\n",
    "\n",
    "# We create a dictionary with names of agents and their corresponding agent configuration\n",
    "agent_dict = {\"main_agent\": main_agent_config}\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "f1b48cc8-397a-4375-bb85-bd398f8bae3a",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the actions\n",
    "\n",
    "action_dict = {\n",
    "    \"humanoid_joint_action\": HumanoidJointActionConfig()\n",
    "}\n",
    "env = init_rearrange_env(agent_dict, action_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "46ed9c42-5c48-4180-899c-45bb33270aae",
   "metadata": {},
   "source": [
    "When resetting the environment, the humanoid is standing by default"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "4f2c27d7-70c6-4d95-8838-c3e40e24a135",
   "metadata": {},
   "outputs": [],
   "source": [
    "obs = env.reset()\n",
    "_, ax = plt.subplots(1,len(obs.keys()))\n",
    "\n",
    "for ind, name in enumerate(obs.keys()):\n",
    "    ax[ind].imshow(obs[name])\n",
    "    ax[ind].set_axis_off()\n",
    "    ax[ind].set_title(name)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "18075047-30b6-424e-a787-6691873966ce",
   "metadata": {},
   "source": [
    "Like other articulated agents, we can rotate and move their base."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "90810689-65aa-432e-b923-a850d6f2c78c",
   "metadata": {},
   "outputs": [],
   "source": [
    "sim = env.sim\n",
    "observations = []\n",
    "num_iter = 100\n",
    "pos_delta = mn.Vector3(0.02,0,0)\n",
    "rot_delta = np.pi / (8 * num_iter)\n",
    "art_agent = sim.articulated_agent\n",
    "sim.reset()\n",
    "# set_fixed_camera(sim)\n",
    "for _ in range(num_iter):\n",
    "    # TODO: this actually seems to give issues...\n",
    "    art_agent.base_pos = art_agent.base_pos + pos_delta\n",
    "    art_agent.base_rot = art_agent.base_rot + rot_delta\n",
    "    sim.step({})\n",
    "    observations.append(sim.get_sensor_observations())\n",
    "\n",
    "\n",
    "vut.make_video(\n",
    "    observations,\n",
    "    \"third_rgb\",\n",
    "    \"color\",\n",
    "    \"robot_tutorial_video\",\n",
    "    open_vid=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "780722b7-c079-42a1-b3d3-a84e92e518f0",
   "metadata": {},
   "source": [
    "We can also use the HumanoidJointAction to sample new rotations and joints"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d354e607-d2ab-41a1-8add-603c6a9d5327",
   "metadata": {},
   "outputs": [],
   "source": [
    "# TODO: maybe we can make joint_action a subclass of dict, and have a custom function for it\n",
    "import random\n",
    "def random_rotation():\n",
    "    random_dir = mn.Vector3(np.random.rand(3)).normalized()\n",
    "    random_angle = random.random() * np.pi\n",
    "    random_rat = mn.Quaternion.rotation(mn.Rad(random_angle), random_dir)\n",
    "    return random_rat\n",
    "def custom_sample_humanoid():\n",
    "    base_transform = mn.Matrix4() \n",
    "    random_rot = random_rotation()\n",
    "    offset_transform = mn.Matrix4.from_(random_rot.to_matrix(), mn.Vector3())\n",
    "    joints = []\n",
    "    num_joints = 54\n",
    "    for _ in range(num_joints):\n",
    "        Q = random_rotation()\n",
    "        joints = joints + list(Q.vector) + [float(Q.scalar)]\n",
    "    offset_trans = list(np.asarray(offset_transform.transposed()).flatten())\n",
    "    base_trans = list(np.asarray(base_transform.transposed()).flatten())\n",
    "    random_vec = joints + offset_trans + base_trans\n",
    "    return {\n",
    "        \"human_joints_trans\": random_vec\n",
    "    }\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "97ff2b3a-5206-4bcc-b5c5-6a12b1711539",
   "metadata": {},
   "outputs": [],
   "source": [
    "# We can now call the defined actions\n",
    "observations = []\n",
    "num_iter = 40\n",
    "env.reset()\n",
    "for _ in range(num_iter):\n",
    "    params = custom_sample_humanoid()\n",
    "    action_dict = {\n",
    "        \"action\": \"humanoid_joint_action\",\n",
    "        \"action_args\": params\n",
    "    }\n",
    "    observations.append(env.step(action_dict))\n",
    "vut.make_video(\n",
    "    observations,\n",
    "    \"third_rgb\",\n",
    "    \"color\",\n",
    "    \"robot_tutorial_video\",\n",
    "    open_vid=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "23391691-21fd-466f-acf9-cd16356d3dcc",
   "metadata": {},
   "source": [
    "# Humanoid Controllers\n",
    "While you can manually set the agent joints and rotations, or train a policy to set them, in many cases you may just be interested in generating realistic motions, either coming from a separate model or from motion capture data. For this, we introduce the HumanoidControllers, which are helper classes to calculate humanoid poses. We currently provide two of them, which we cover here."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b9d5c073-3d83-4b28-aebd-67da6299fe38",
   "metadata": {},
   "source": [
    "## HumanoidSeqPoseController\n",
    "This pose controller is designed to play a motion sequence, which can come from a model or a motion capture file. For this, you will need a `.pkl` motion file containing a sequence of poses. You can find [here](https://github.com/facebookresearch/habitat-lab/tree/main/habitat-lab/habitat/articulated_agents/humanoids#sequentialposecontroller) how to convert a motion file with a format commonly used in SMPL-X (including AMASS or Motion Diffusion Models) into a `.pkl` which can be consumed in by the HumanoidSeqPoseController."
   ]
  },
  {
   "cell_type": "markdown",
   "id": "6390246f-230a-4de5-b619-f972f9c236aa",
   "metadata": {},
   "source": [
    "### Convert Motion File"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "0a1dd703-a895-48e5-9cbf-3d1574ae2ee7",
   "metadata": {},
   "outputs": [],
   "source": [
    "from habitat.utils.humanoid_utils import MotionConverterSMPLX\n",
    "PATH_TO_URDF = \"data/humanoids/humanoid_data/female_2/female_2.urdf\"\n",
    "PATH_TO_MOTION_NPZ = \"data/humanoids/humanoid_data/walk_motion/CMU_10_04_stageii.npz\"\n",
    "convert_helper = MotionConverterSMPLX(urdf_path=PATH_TO_URDF)\n",
    "convert_helper.convert_motion_file(\n",
    "    motion_path=PATH_TO_MOTION_NPZ,\n",
    "    output_path=PATH_TO_MOTION_NPZ.replace(\".npz\", \"\"),\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "b0cff0a6-4081-416f-8224-80d870905201",
   "metadata": {},
   "source": [
    "### Initialize Controller"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "82881361-2efe-4285-9063-cf8510dd7cf3",
   "metadata": {},
   "outputs": [],
   "source": [
    "env.reset()\n",
    "motion_path = \"data/humanoids/humanoid_data/walk_motion/CMU_10_04_stageii.pkl\" \n",
    "# We define here humanoid controller\n",
    "humanoid_controller = HumanoidSeqPoseController(motion_path)\n",
    "\n",
    "\n",
    "# Because we want the humanoid controller to generate a motion relative to the current agent, we need to set\n",
    "# the reference pose\n",
    "humanoid_controller.reset(env.sim.articulated_agent.base_transformation)\n",
    "humanoid_controller.apply_base_transformation(env.sim.articulated_agent.base_transformation)\n"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "8f62c0ab-2164-4dde-a396-36a74d00f1df",
   "metadata": {},
   "source": [
    "### Get pose sequence"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "d6a3ac16-7161-4d8a-817c-20ccea64fae3",
   "metadata": {},
   "outputs": [],
   "source": [
    "observations = []\n",
    "for _ in range(humanoid_controller.humanoid_motion.num_poses):\n",
    "    # These computes the current pose and calculates the next pose\n",
    "    humanoid_controller.calculate_pose()\n",
    "    humanoid_controller.next_pose()\n",
    "    \n",
    "    # The get_pose function gives as a humanoid pose in the same format as HumanoidJointAction\n",
    "    new_pose = humanoid_controller.get_pose()\n",
    "    action_dict = {\n",
    "        \"action\": \"humanoid_joint_action\",\n",
    "        \"action_args\": {\"human_joints_trans\": new_pose}\n",
    "    }\n",
    "    observations.append(env.step(action_dict))\n",
    "    \n",
    "vut.make_video(\n",
    "    observations,\n",
    "    \"third_rgb\",\n",
    "    \"color\",\n",
    "    \"robot_tutorial_video\",\n",
    "    open_vid=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "0318eaf2-8200-4dcd-bd5a-5723efd506c6",
   "metadata": {},
   "source": [
    "### HumanoidRearrangeController\n",
    "The second controller we provide is used to navigate and interact with objects in the environment. "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1633b679-e3a6-48ab-8e3f-aa02725f55c1",
   "metadata": {},
   "outputs": [],
   "source": [
    "# As before, we first define the controller, here we use a special motion file we provide for each agent.\n",
    "motion_path = \"data/hab3_bench_assets/humanoids/female_0/female_0_motion_data_smplx.pkl\" \n",
    "# We define here humanoid controller\n",
    "humanoid_controller = HumanoidRearrangeController(motion_path)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "c8220846-279f-420d-b711-45d499b878e4",
   "metadata": {},
   "source": [
    "We can use this controller to generate motion to walk to a given direction"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "1f0ca9fb-100e-4c6a-bf34-83b23f8110c0",
   "metadata": {},
   "outputs": [],
   "source": [
    "# We reset the controller\n",
    "env.reset()\n",
    "humanoid_controller.reset(env.sim.articulated_agent.base_transformation)\n",
    "observations = []\n",
    "print(env.sim.articulated_agent.base_pos)\n",
    "for _ in range(100):\n",
    "    # This computes a pose that moves the agent to relative_position\n",
    "    relative_position = env.sim.articulated_agent.base_pos + mn.Vector3(0,0,1)\n",
    "    humanoid_controller.calculate_walk_pose(relative_position)\n",
    "    \n",
    "    # The get_pose function gives as a humanoid pose in the same format as HumanoidJointAction\n",
    "    new_pose = humanoid_controller.get_pose()\n",
    "    action_dict = {\n",
    "        \"action\": \"humanoid_joint_action\",\n",
    "        \"action_args\": {\"human_joints_trans\": new_pose}\n",
    "    }\n",
    "    observations.append(env.step(action_dict))\n",
    "    \n",
    "vut.make_video(\n",
    "    observations,\n",
    "    \"third_rgb\",\n",
    "    \"color\",\n",
    "    \"robot_tutorial_video\",\n",
    "    open_vid=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "d3daaefe-d3d0-490e-8314-cda4092af7a2",
   "metadata": {},
   "source": [
    "The controller can also be used to reach different positions with the hand"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "c522a794-7fa9-41bc-b801-e3a9071285ab",
   "metadata": {},
   "outputs": [],
   "source": [
    "# We reset the controller\n",
    "env.reset()\n",
    "humanoid_controller.reset(env.sim.articulated_agent.base_transformation)\n",
    "observations = []\n",
    "print(env.sim.articulated_agent.base_pos)\n",
    "\n",
    "# Get the hand pose\n",
    "offset =  env.sim.articulated_agent.base_transformation.transform_vector(mn.Vector3(0, 0.3, 0))\n",
    "hand_pose = env.sim.articulated_agent.ee_transform(0).translation + offset\n",
    "for _ in range(100):\n",
    "    # This computes a pose that moves the agent to relative_position\n",
    "    hand_pose = hand_pose + mn.Vector3((np.random.rand(3) - 0.5) * 0.1)\n",
    "    humanoid_controller.calculate_reach_pose(hand_pose, index_hand=0)\n",
    "    \n",
    "    # The get_pose function gives as a humanoid pose in the same format as HumanoidJointAction\n",
    "    new_pose = humanoid_controller.get_pose()\n",
    "    action_dict = {\n",
    "        \"action\": \"humanoid_joint_action\",\n",
    "        \"action_args\": {\"human_joints_trans\": new_pose}\n",
    "    }\n",
    "    observations.append(env.step(action_dict))\n",
    "    \n",
    "vut.make_video(\n",
    "    observations,\n",
    "    \"third_rgb\",\n",
    "    \"color\",\n",
    "    \"robot_tutorial_video\",\n",
    "    open_vid=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "26e7c632-9193-4d7f-a326-80e067dad2fd",
   "metadata": {},
   "source": [
    "# Executing Humanoid Actions\n",
    "While you can use controllers to animate the humanoids, we also provide a set of actions that control the humanoid to navigate around the scene or pick and place objects."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "6e6bc11b-cb78-4097-b2df-f850b8f1ae94",
   "metadata": {},
   "outputs": [],
   "source": [
    "# Define the actions\n",
    "\n",
    "\n",
    "action_dict = {\n",
    "    \"humanoid_joint_action\": HumanoidJointActionConfig(),\n",
    "    \"humanoid_navigate_action\": OracleNavActionConfig(type=\"OracleNavCoordinateAction\", \n",
    "                                                      motion_control=\"human_joints\",\n",
    "                                                      spawn_max_dist_to_obj=1.0),\n",
    "    \"humanoid_pick_obj_id_action\": HumanoidPickActionConfig(type=\"HumanoidPickObjIdAction\")\n",
    "    \n",
    "}\n",
    "env = init_rearrange_env(agent_dict, action_dict)"
   ]
  },
  {
   "cell_type": "markdown",
   "id": "eb861e6e-0bac-4ffd-9779-032b64f1581a",
   "metadata": {},
   "source": [
    "We can now navigate to a target object"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "8a6478a0-e3f8-46ca-a9a5-6cea7eb39208",
   "metadata": {},
   "outputs": [],
   "source": [
    "env.reset()\n",
    "rom = env.sim.get_rigid_object_manager()\n",
    "# env.sim.articulated_agent.base_pos = init_pos\n",
    "# As before, we get a navigation point next to an object id\n",
    "\n",
    "obj_id = env.sim.scene_obj_ids[0]\n",
    "first_object = rom.get_object_by_id(obj_id)\n",
    "\n",
    "object_trans = first_object.translation\n",
    "print(first_object.handle, \"is in\", object_trans)\n",
    "# TODO: unoccluded object did not work\n",
    "# print(sample)\n",
    "observations = []\n",
    "delta = 2.0\n",
    "\n",
    "object_agent_vec = env.sim.articulated_agent.base_pos - object_trans\n",
    "object_agent_vec.y = 0\n",
    "dist_agent_object = object_agent_vec.length()\n",
    "# Walk towards the object\n",
    "\n",
    "agent_displ = np.inf\n",
    "agent_rot = np.inf\n",
    "prev_rot = env.sim.articulated_agent.base_rot\n",
    "prev_pos = env.sim.articulated_agent.base_pos\n",
    "while agent_displ > 1e-9 or agent_rot > 1e-9:\n",
    "    prev_rot = env.sim.articulated_agent.base_rot\n",
    "    prev_pos = env.sim.articulated_agent.base_pos\n",
    "    action_dict = {\n",
    "        \"action\": (\"humanoid_navigate_action\"), \n",
    "        \"action_args\": {\n",
    "              \"oracle_nav_lookat_action\": object_trans,\n",
    "              \"mode\": 1\n",
    "          }\n",
    "    }\n",
    "    observations.append(env.step(action_dict))\n",
    "    \n",
    "    cur_rot = env.sim.articulated_agent.base_rot\n",
    "    cur_pos = env.sim.articulated_agent.base_pos\n",
    "    agent_displ = (cur_pos - prev_pos).length()\n",
    "    agent_rot = np.abs(cur_rot - prev_rot)\n",
    "    \n",
    "# Wait\n",
    "for _ in range(20):\n",
    "    action_dict = {\"action\": (), \"action_args\": {}}\n",
    "    observations.append(env.step(action_dict))\n",
    "\n",
    "# Pick object\n",
    "observations.append(env.step(action_dict))\n",
    "for _ in range(100):\n",
    "    \n",
    "    action_dict = {\"action\": (\"humanoid_pick_obj_id_action\"), \"action_args\": {\"humanoid_pick_obj_id\": obj_id}}\n",
    "    observations.append(env.step(action_dict)) \n",
    "    \n",
    "vut.make_video(\n",
    "    observations,\n",
    "    \"third_rgb\",\n",
    "    \"color\",\n",
    "    \"robot_tutorial_video\",\n",
    "    open_vid=True,\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "id": "2c225a93-ffe8-4b34-b421-daf1185f40ef",
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3 (ipykernel)",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.9.18"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 5
}
